Observed data

Observed log-chlorophyll at representative station for the St. Lucie Estuary

library(tidyverse)
library(lubridate)
library(mgcv)  
library(plotly)
library(WRTDStidal)
library(gridExtra)
source('R/funcs.R')

# flow data, left moving window average of 30 days
data(sl_fldat)
fl_dat <- sl_fldat %>% 
  rename(date = Date) %>% 
  mutate(
    qsm = stats::filter(q, rep(1, 30)/30, sides = 1, method = 'convolution')
  )

# format the data to model
data(sl_dat)
sl_mod <- sl_dat %>%
  rename(date = Date) %>% 
  mutate(
    doy = yday(date), 
    dec_time = decimal_date(date), 
    yr = year(date),
    yr = factor(yr),
    mo = month(date, label = T)
  ) %>% 
  left_join(fl_dat, by = 'date') %>% 
  mutate(
    flo = log(qsm),
    chl = log(1 + chl)
    ) %>% 
  select(-q, -qsm, -sal)

# plot, all
p <- ggplot(sl_mod, aes(x = date, y = chl)) + 
  geom_line() +
  theme_bw() 
ggplotly(p)
# boxplot, by years
p <- ggplot(sl_mod, aes(x = yr, y = chl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)
# boxplot, by month
p <- ggplot(sl_mod, aes(x = mo, y = chl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)

Adding nitrogen and turbidity covariates

# formula for best annual, seasonal, flow model
strt <- best2$formula %>% 
  as.character

smths <- c(
  "s(nh, bs = 'tp')",  
  "s(tss, bs = 'tp')",
  "te(nh, tss, bs = c('tp', 'tp'))"
)

# get all combinations of smoothers to model, one to many
frmstab <- list()
frms <- list()
for(i in seq_along(smths)){
  
  # for the summary table  
  frmtab <- combn(smths, i) %>%
    apply(2, function(x){
      paste(x, collapse = ' + ')
      })
  
  # for the model
  frm <- sapply(frmtab, function(x){  
        paste('chl ~', strt[3], '+', x) %>% 
          formula
        })
  
  frmstab <- c(frmstab, frmtab)
  frms <- c(frms, frm)
 
}

# create models from smooth formula combinations
mods3 <- map(frms, function(frm){
  
  gam(frm, 
    knots = list(doy = c(1, 366)),
    data = sl_mod, 
    na.action = na.exclude
  )

})
names(mods3) <- paste0('mod', seq_along(mods3))

Summary of all nutrient, turbidity models

# smoother stats of GAMs
map(mods3, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>% 
  enframe %>% 
  unnest %>% 
  kable
name smoother edf Ref.df F p.value
mod1 s(dec_time) 7.1522980 8.144604 2.2843078 0.0226542
mod1 s(doy) 4.4270740 8.000000 4.7050151 0.0000000
mod1 s(flo) 3.5483178 4.353752 2.7567268 0.0253103
mod1 te(flo,doy) 0.8014227 15.000000 0.0725558 0.0790987
mod1 te(flo,dec_time) 2.0738102 16.000000 0.2060798 0.0478041
mod1 te(dec_time,doy) 0.0000009 15.000000 0.0000000 0.6596548
mod1 te(dec_time,doy,flo) 7.4985802 48.000000 0.3037391 0.0030504
mod1 s(nh) 3.1140722 3.791727 8.7739122 0.0000024
mod2 s(dec_time) 7.3595891 8.301880 1.7741030 0.0781964
mod2 s(doy) 3.7288636 8.000000 2.1351290 0.0000375
mod2 s(flo) 6.4627017 7.626692 1.3052852 0.1888331
mod2 te(flo,doy) 0.2021979 15.000000 0.0149169 0.1829650
mod2 te(flo,dec_time) 3.5399133 16.000000 0.6739463 0.0029717
mod2 te(dec_time,doy) 0.0000039 15.000000 0.0000001 0.6162590
mod2 te(dec_time,doy,flo) 9.4311894 48.000000 0.3786696 0.0037678
mod2 s(tss) 4.0133779 4.921622 3.5508373 0.0044754
mod3 s(dec_time) 8.0417018 8.725110 2.7393263 0.0051412
mod3 s(doy) 4.4082508 8.000000 5.7139093 0.0000000
mod3 s(flo) 1.0000209 1.000033 5.8789383 0.0159194
mod3 te(flo,doy) 0.7171574 15.000000 0.0955845 0.0138524
mod3 te(flo,dec_time) 3.8664969 16.000000 1.2835237 0.0000002
mod3 te(dec_time,doy) 0.0000014 15.000000 0.0000001 0.5863846
mod3 te(dec_time,doy,flo) 4.4251546 48.000000 0.1510152 0.0217732
mod3 te(nh,tss) 10.9862923 12.320136 5.5396180 0.0000000
mod4 s(dec_time) 7.8748101 8.634556 2.2810759 0.0215738
mod4 s(doy) 4.3273103 8.000000 5.6933609 0.0000000
mod4 s(flo) 4.6583896 5.686054 2.8227241 0.0111382
mod4 te(flo,doy) 0.0283851 15.000000 0.0033107 0.0968418
mod4 te(flo,dec_time) 2.6963868 16.000000 0.3558319 0.0125090
mod4 te(dec_time,doy) 0.0000014 15.000000 0.0000001 0.6166412
mod4 te(dec_time,doy,flo) 6.0892601 48.000000 0.2285051 0.0098800
mod4 s(nh) 3.3583330 4.074465 9.0342260 0.0000006
mod4 s(tss) 4.5158071 5.487045 3.8275540 0.0016388
mod5 s(dec_time) 8.0484777 8.729073 2.7767507 0.0047801
mod5 s(doy) 4.3976198 8.000000 5.3574757 0.0000000
mod5 s(flo) 1.0000029 1.000005 5.6587753 0.0180052
mod5 te(flo,doy) 0.9753113 15.000000 0.1244896 0.0173888
mod5 te(flo,dec_time) 3.9431131 16.000000 1.2111145 0.0000004
mod5 te(dec_time,doy) 0.0000025 15.000000 0.0000001 0.6013498
mod5 te(dec_time,doy,flo) 4.3699401 48.000000 0.1461156 0.0242046
mod5 s(nh) 3.6223225 4.509054 5.9038455 0.0000760
mod5 te(nh,tss) 9.8228824 20.000000 1.5171723 0.0001388
mod6 s(dec_time) 7.8567156 8.626056 2.4772317 0.0121889
mod6 s(doy) 4.2434324 8.000000 5.0031988 0.0000000
mod6 s(flo) 3.3138514 4.071771 3.5596082 0.0069834
mod6 te(flo,doy) 0.2255661 15.000000 0.0291191 0.0744306
mod6 te(flo,dec_time) 2.5038558 16.000000 0.3256927 0.0122653
mod6 te(dec_time,doy) 0.0000007 15.000000 0.0000000 0.6201662
mod6 te(dec_time,doy,flo) 6.3129187 48.000000 0.2404234 0.0083718
mod6 s(tss) 4.5950839 5.576246 3.6017473 0.0024819
mod6 te(nh,tss) 2.2494458 14.000000 2.7791025 0.0000000
mod7 s(dec_time) 7.9410958 8.671029 2.5158647 0.0107913
mod7 s(doy) 4.3386360 8.000000 4.7255731 0.0000000
mod7 s(flo) 3.1281958 3.841844 3.4897305 0.0089717
mod7 te(flo,doy) 0.3594447 15.000000 0.0451695 0.0808500
mod7 te(flo,dec_time) 2.5392626 16.000000 0.3378472 0.0106254
mod7 te(dec_time,doy) 0.0000004 15.000000 0.0000000 0.6355450
mod7 te(dec_time,doy,flo) 5.9550492 48.000000 0.2262731 0.0089427
mod7 s(nh) 1.0000004 1.000001 0.0085306 0.9264745
mod7 s(tss) 4.4162128 5.369065 3.7807464 0.0019287
mod7 te(nh,tss) 2.1696083 15.000000 1.7236509 0.0000000
# summary stats of GAMs
map(mods3, ~ data.frame(
    AIC = AIC(.x), 
    R2 = summary(.x)$r.sq)) %>% 
  enframe %>% 
  unnest %>% 
  mutate(smth_added = frmstab) %>% 
  select(name, smth_added, everything()) %>% 
  kable
name smth_added AIC R2
mod1 s(nh, bs = ‘tp’) 497.8121 0.3680850
mod2 s(tss, bs = ‘tp’) 534.1996 0.3471245
mod3 te(nh, tss, bs = c(‘tp’, ‘tp’)) 481.6626 0.4065122
mod4 s(nh, bs = ‘tp’) + s(tss, bs = ‘tp’) 481.3778 0.4071965
mod5 s(nh, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) 481.8319 0.4105542
mod6 s(tss, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) 478.9757 0.4079437
mod7 s(nh, bs = ‘tp’) + s(tss, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) 479.7727 0.4073784

Summary stats of best three three models:

# best model with season, year, flow
best3 <- map(mods3, ~ summary(.x)$r.sq) %>% 
  unlist %>% 
  which.max %>% 
  mods3[[.]] 

best <- list(best1 = best1, best2 = best2, best3 = best3)

# smoother stats of GAMs
map(best, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>% 
  enframe %>% 
  unnest %>% 
  kable
name smoother edf Ref.df F p.value
best1 s(dec_time) 1.0000001 1.000000 10.6360338 0.0012203
best1 s(doy) 4.1645525 8.000000 5.1577131 0.0000000
best1 te(dec_time,doy) 2.8523113 15.000000 0.2807338 0.1611397
best2 s(dec_time) 1.0000008 1.000001 1.9293923 0.1658109
best2 s(doy) 2.9402431 8.000000 0.7092063 0.0456631
best2 s(flo) 6.7839630 7.863753 2.9627043 0.0049629
best2 te(flo,doy) 2.9616367 12.000000 0.5845557 0.0124682
best2 te(flo,dec_time) 0.2050133 15.000000 0.0193067 0.0936091
best2 te(dec_time,doy) 0.0000023 15.000000 0.0000001 0.8125709
best2 te(dec_time,doy,flo) 19.8642287 48.000000 1.0091690 0.0000325
best3 s(dec_time) 8.0484777 8.729073 2.7767507 0.0047801
best3 s(doy) 4.3976198 8.000000 5.3574757 0.0000000
best3 s(flo) 1.0000029 1.000005 5.6587753 0.0180052
best3 te(flo,doy) 0.9753113 15.000000 0.1244896 0.0173888
best3 te(flo,dec_time) 3.9431131 16.000000 1.2111145 0.0000004
best3 te(dec_time,doy) 0.0000025 15.000000 0.0000001 0.6013498
best3 te(dec_time,doy,flo) 4.3699401 48.000000 0.1461156 0.0242046
best3 s(nh) 3.6223225 4.509054 5.9038455 0.0000760
best3 te(nh,tss) 9.8228824 20.000000 1.5171723 0.0001388
# summary stats of GAMs
map(best, ~ data.frame(
    AIC = AIC(.x), 
    R2 = summary(.x)$r.sq)) %>% 
  enframe %>% 
  unnest %>% 
  kable
name AIC R2
best1 608.6850 0.1711443
best2 556.8277 0.3351218
best3 481.8319 0.4105542
# predictions
sl_res3 <- map(best, function(x){
  sl_mod %>% 
    mutate(
      pred = predict(x)
    )
  }) %>% 
  enframe('mods') %>% 
  unnest

# plot
p <- ggplot(sl_res3, aes(x = date)) + 
  geom_point(data = sl_mod, aes(y = chl), size = 0.5) + 
  geom_line(aes(y = pred, colour = mods)) + 
  theme_bw() + 
  theme(
    legend.position = 'top', 
    legend.title = element_blank()
    )
ggplotly(p)